from lxml import etree
import requests


url = 'https://www.aqistudy.cn/historydata/'
headers ={
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
}
response = requests.get(url, headers=headers).text
tree = etree.HTML(response)

ul_list = tree.xpath('//div[@class="bottom"]/ul')
f = open('./city.txt','w',encoding='utf-8')
for ul in ul_list:
    # 获取<b>标签的文本内容，如果存在的话取第一个元素，否则为空字符串
    zm = ul.xpath('./div[1]/b/text()')
    zm_text = zm[0] if zm else ''

    # 获取所有<a>标签的文本内容
    cs = ul.xpath('./div[2]//a/text()')
    # 将列表转换为由换行符分隔的字符串
    cs_text = '    '.join(cs)

    # 写入文件
    f.write(zm_text + '\n\n' + cs_text + '\n\n\n\n')

f.close()
